package Markdent::Handler::TableOfContents;

use strict; 
use warnings;

use URI;
use Moose;
use GoogleWiki2Markdown;
use File::Slurp;
use constant::boolean;
use File::Spec::Functions qw(catfile);
use Log::Log4perl qw(:easy);
use Data::Dumper;

with 'Markdent::Role::EventsAsMethods';

# use autoload so we don't have to implement a handler for all possible parse events

our $AUTOLOAD; 
sub AUTOLOAD { 
    DEBUG(sprintf("unhandled parse event %s", $AUTOLOAD)); 
}

#----------------------------------------------------------------------
# public methods
#----------------------------------------------------------------------

sub new {
    my ($class, $working_dir, @reference_files) = @_;
    my $self = {
        list_level => 0,                       # current depth in TOC
        working_dir => $working_dir,           # directory containing Google Wiki files
        visited => {},                         # visited pages or sections of pages
        markdown_accum => '',                  # concatenated markdown for all pages/sections in TOC
        in_link => TRUE,                       # are we processing text within a hyperlink?
        reference_files => \@reference_files,  # reference pages require special formatting
        anchor => {},                          # autogenerated HTML anchors for pages and sections
        anchor_count => 0,                     # used when autogenerating anchors above
    };
    return bless($self, $class);
}

sub get_generated_markdown {
    my $self = shift;
    return $self->{markdown_accum};
}

#----------------------------------------------------------------------
# markdown parsing event handlers
#----------------------------------------------------------------------

sub start_unordered_list { 
    my $self = shift;
    $self->{list_level}++; 
}

sub end_unordered_list { 
    my $self = shift;
    $self->{list_level}--; 
}

sub start_link { 

    my ($self, %args) = @_;
    my $visited = $self->{visited};
    my $uri = URI->new($args{uri});

    if (!defined($uri->scheme())) { # if uri is relative

        INFO(sprintf('processing TOC link %s', $uri->as_string()));

        my $page = $uri->path();
        my $anchor = $uri->fragment();

        if (!$visited->{$uri} && !$visited->{$page}) {

            my $file = catfile($self->{working_dir}, $page.'.wiki');
            my $google = read_file($file);
            my $markdown = GoogleWiki2Markdown::convert($google);      

            # if link targets a specific section, only add that section 
            if ($anchor) {
                my $header = $self->_google_anchor_to_header($anchor);    
                my $markdown = $self->_extract_markdown_section($markdown, $header);
            } 

            # prepend an autogenerated anchors to headers and tops of pages
            $markdown = $self->_add_anchors($page, $markdown);

            # reference pages require special formatting
            if (grep($file eq $_, @{$self->{reference_files}})) {
                $markdown = $self->_reformat_reference_page($markdown);
            }

            # adjust header levels according to where this page/section appears in the TOC
            $markdown = $self->_inc_markdown_header_levels($markdown, $self->{list_level} - 1);

            $self->{markdown_accum} .= $markdown . "\n";
            $visited->{$uri} = TRUE;

        } 

        $self->{link_uri} = $uri;
        $self->{in_link} = TRUE;

   }
}

sub end_link {
   my $self = shift;
   $self->{in_link} = FALSE; 
}

sub text {

    my ($self, %args) = @_;
    my $text = $args{text};
    my $visited = $self->{visited};
    $text =~ s/^\s*(.*?)\s*$/$1/; # trim

    # ignore whitespace text events (e.g. newlines at end of list items)
    return unless $text; 

    # ignore text that isn't in a list item (e.g. text preceding TOC)
    return unless $self->{list_level} > 0;
    
    # add markdown headers for plain text entries in TOC
    if (!$self->{in_link}) {
        TRACE("adding header for plain text TOC entry: '$text'");
        $self->{markdown_accum} .= '#'x($self->{list_level}) . " $text\n";
    }

}

sub end_document {
    my $self = shift;
    $self->{markdown_accum} = $self->_convert_wiki_links_to_html_links($self->{markdown_accum});
} 

#----------------------------------------------------------------------
# private methods
#----------------------------------------------------------------------

sub _add_anchors {
    my ($self, $page, $markdown) = @_;
    # anchor for top of page
    $markdown = "<a name='" . $self->_get_anchor($page) . "'/>\n" . $markdown;
    # anchors for headers
    $markdown =~ s/^(\#+\s*)(.*[^\s])/"<a name='" . $self->_get_anchor($page, $2) . "'\/>\n\n" . $&/emg;
    return $markdown;
}

sub _get_anchor {
    my ($self, $page, $header) = @_;
    return undef unless $page;
    my $google_wiki_link = $self->_get_google_wiki_link($page, $header);
    unless ($self->{anchor}->{$google_wiki_link}) {
        $self->{anchor}->{$google_wiki_link} = 'anchor' . $self->{anchor_count}++;
    }
    DEBUG(sprintf("generated anchor '#%s' for '%s'%s", 
        $self->{anchor}->{$google_wiki_link}, $page, ($header ? "/'$header'" : '')));
    return $self->{anchor}->{$google_wiki_link};
}

sub _get_google_wiki_link {
    my ($self, $page, $header) = @_;
    my $google_wiki_link;
    $google_wiki_link .= $page if $page;
    $google_wiki_link .= "#" . $self->_get_google_header_anchor($header) if $header;
    return $google_wiki_link;
}

sub _get_google_header_anchor {
    my ($self, $header) = @_;
    my $anchor = $header;
    $anchor =~ s/ /_/g;
    return $anchor;
}

sub _convert_wiki_links_to_html_links {
    my ($self, $markdown) = @_;
    # markdown links look like: [ <label> ]( <URI> )
    $markdown =~ s/\[([^\[\]]+)\]\(([^\(\)]+)\)/"[$1](#" . $self->{anchor}{$2} . ")"/eg;
    return $markdown;
}

sub _inc_markdown_header_levels {
    my ($self, $markdown, $inc) = @_;
    return $markdown unless $inc > 0;
    $markdown =~ s/^\#/'#'x($inc + 1)/emg;
    return $markdown;
}

sub _google_anchor_to_header {
    my ($self, $anchor) = @_;
    my $header = $anchor;
    $header =~ s/_/ /g;   
    return $header;
}

sub _extract_markdown_section {
    my ($self, $markdown, $header) = @_; 
    $markdown =~ /^\#+\s*$header\s*\#*\s*^(.*?)(?=(^\#)|\z)/ms;  # \z is EOF
    return $1;
}

sub _reformat_reference_page {
    my ($self, $markdown) = @_;
    # note: it is important that the <dl> tags appear in the leftmost column,
    # otherwise markdown will treat it as a code figure
    $markdown =~ s{
        ^\#\#+ \s* ([^\r\n]*[^\s]) \s*  # header, level 2 or higher
        (.*?)                           # content of citation
        (?=(^\#)|\z)                    # match everything up to next header or EOF (\z)
    }{
<dl>
    <dt><a name='$1'>[$1]</a></dt>
    <dd>$2</dd>
</dl>
    }msxg;
    return $markdown;
}

1;
